#loading Libraries
library(readxl)
library(openxlsx)
library(tidyr)
library(stringr)
library(tibble)
library(car)
## Loading required package: carData
library(showtext)
## Loading required package: sysfonts
## Loading required package: showtextdb
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(CCP)
library(readxl)
library(ggcorrplot)
## Loading required package: ggplot2
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(corrplot)
## corrplot 0.92 loaded
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loaded glmnet 4.1-6
library(factoextra)
library(FactoMineR)
require(FactoMineR)
library(ggplot2)
require(factoextra)
setwd("/Users/jettadler/Desktop/Data /Study")
#Defining Data
rm(list = ls())
stl_raw <- read_xls('steel_data.xls')
al_raw <- read_xls('Al_data.xls')
#removing blank rows and name/number coulumbs
toDelete <- seq(1, 974, 2)
stl <- stl_raw[-toDelete, ]
al <- al_raw[-toDelete, ]
stl <- stl[c(-1)]
al <- al[c(-1)]
#Seperating Data into Elemental Compositons, Strain, and Crack Length Data
stEl <- stl[c(1:16)]
alEl <- al[c(1:13)]
stSTN <- stl[c(21)]
stTCL <- stl[c(22)]
alSTN <- al[c(18)]
alMCL <- al[c(19)]
#First Visual check of possible correlations between variables
cors <- cor(stl)
cora <- cor(al)
#corrupt(cors, method='number')
corrplot(cora, method='circle')

cors
## C Si Mn P S Cr
## C 1.00000000 0.27845666 -0.07746384 0.12421010 0.23829076 0.30252074
## Si 0.27845666 1.00000000 0.54288940 0.24403814 0.38630488 -0.08870044
## Mn -0.07746384 0.54288940 1.00000000 -0.01076547 0.04041910 -0.08914258
## P 0.12421010 0.24403814 -0.01076547 1.00000000 0.28347200 -0.26479798
## S 0.23829076 0.38630488 0.04041910 0.28347200 1.00000000 0.03644738
## Cr 0.30252074 -0.08870044 -0.08914258 -0.26479798 0.03644738 1.00000000
## Ni 0.21548779 -0.24832062 -0.14784543 -0.28337310 -0.16648652 0.34011443
## Mo -0.09549286 -0.13819168 -0.02845569 0.23045744 0.01192515 -0.34391819
## N -0.02481124 0.22103907 0.52387534 -0.11579085 0.02805146 0.29194807
## Nb -0.12579354 -0.20581200 0.01201195 -0.20526057 -0.13866867 -0.09773600
## Co -0.09669215 -0.05572159 0.01495095 0.18950145 0.11394630 -0.14301930
## Cu -0.02918592 -0.13300644 -0.15206014 -0.11422006 -0.09795553 -0.07875766
## Al -0.04188047 -0.23260626 -0.22424775 -0.21209939 -0.18421137 0.01428158
## Ti -0.02531531 -0.24281444 -0.19008803 -0.12976077 -0.28308274 -0.13446150
## V 0.10629471 -0.18519455 0.13304538 -0.21233624 -0.01149909 -0.15701641
## B 0.06647661 -0.08702766 0.07066607 -0.06543854 -0.10817779 -0.22581597
## Th 0.39039469 0.05270954 -0.33500554 0.18192163 0.23337489 0.27515468
## I 0.46477037 0.11991791 -0.36261756 0.32546704 0.35347395 0.20792318
## U -0.04636266 -0.09558830 -0.17551355 -0.09585409 -0.05185645 0.42670047
## Ve -0.08895166 0.19076415 0.15947449 0.20161899 0.18986257 -0.40924212
## Strain -0.22771168 -0.10258401 0.04880527 -0.02243995 -0.18159471 -0.11390472
## TCL 0.14411528 -0.03647010 -0.13099629 0.01561729 -0.11962175 0.03810476
## Ni Mo N Nb Co Cu
## C 0.21548779 -0.09549286 -0.02481124 -0.12579354 -0.09669215 -0.029185924
## Si -0.24832062 -0.13819168 0.22103907 -0.20581200 -0.05572159 -0.133006445
## Mn -0.14784543 -0.02845569 0.52387534 0.01201195 0.01495095 -0.152060141
## P -0.28337310 0.23045744 -0.11579085 -0.20526057 0.18950145 -0.114220062
## S -0.16648652 0.01192515 0.02805146 -0.13866867 0.11394630 -0.097955535
## Cr 0.34011443 -0.34391819 0.29194807 -0.09773600 -0.14301930 -0.078757663
## Ni 1.00000000 -0.10408396 -0.21280034 -0.07952737 -0.13510072 0.073374902
## Mo -0.10408396 1.00000000 0.02217061 -0.13280162 0.10478750 0.110931108
## N -0.21280034 0.02217061 1.00000000 -0.10490213 0.05665789 -0.025480073
## Nb -0.07952737 -0.13280162 -0.10490213 1.00000000 0.03159128 0.167369769
## Co -0.13510072 0.10478750 0.05665789 0.03159128 1.00000000 -0.018090233
## Cu 0.07337490 0.11093111 -0.02548007 0.16736977 -0.01809023 1.000000000
## Al 0.51472603 -0.14673019 -0.20282819 -0.12249223 -0.09141180 0.146781527
## Ti 0.45899169 -0.07394976 -0.33377034 -0.13915043 -0.15716960 0.167676563
## V 0.16933219 0.17510812 -0.06142697 0.07451321 -0.06661316 0.002611036
## B 0.20741000 0.23308727 -0.17500646 -0.02469812 -0.08367700 -0.078963615
## Th -0.05261927 0.02448561 -0.01815577 -0.18477329 -0.21509654 -0.117651859
## I -0.09658742 0.05869663 -0.09046160 -0.26644977 -0.15674378 -0.029972280
## U -0.01035120 -0.18796257 0.05176411 -0.02213752 -0.18049103 -0.081733434
## Ve -0.14203631 0.14721887 -0.09227065 0.02429409 0.30537700 0.113403144
## Strain -0.23926109 0.03004494 0.09036873 0.12658403 0.10194218 -0.064979957
## TCL 0.41290930 -0.01386736 -0.13861387 -0.12135101 -0.17329686 0.089590436
## Al Ti V B Th
## C -0.04188047 -0.02531531 0.106294712 0.0664766134 0.39039469
## Si -0.23260626 -0.24281444 -0.185194549 -0.0870276593 0.05270954
## Mn -0.22424775 -0.19008803 0.133045383 0.0706660690 -0.33500554
## P -0.21209939 -0.12976077 -0.212336238 -0.0654385438 0.18192163
## S -0.18421137 -0.28308274 -0.011499094 -0.1081777913 0.23337489
## Cr 0.01428158 -0.13446150 -0.157016408 -0.2258159695 0.27515468
## Ni 0.51472603 0.45899169 0.169332190 0.2074099994 -0.05261927
## Mo -0.14673019 -0.07394976 0.175108125 0.2330872675 0.02448561
## N -0.20282819 -0.33377034 -0.061426969 -0.1750064628 -0.01815577
## Nb -0.12249223 -0.13915043 0.074513210 -0.0246981243 -0.18477329
## Co -0.09141180 -0.15716960 -0.066613158 -0.0836769967 -0.21509654
## Cu 0.14678153 0.16767656 0.002611036 -0.0789636148 -0.11765186
## Al 1.00000000 0.43984444 -0.053377976 -0.0535430587 -0.12812092
## Ti 0.43984444 1.00000000 0.239107468 0.3038814520 -0.25461669
## V -0.05337798 0.23910747 1.000000000 0.6001763018 -0.21830014
## B -0.05354306 0.30388145 0.600176302 1.0000000000 -0.22515558
## Th -0.12812092 -0.25461669 -0.218300141 -0.2251555817 1.00000000
## I -0.05461621 -0.20797505 -0.172939417 -0.1962436393 0.89051845
## U -0.11431258 -0.33850179 -0.242033956 -0.2811999863 0.57241764
## Ve -0.03926660 0.06426317 0.309047426 0.2020865864 -0.56769956
## Strain -0.17956990 -0.11546403 -0.080976829 -0.0007026004 -0.13595857
## TCL 0.20227094 0.29650669 0.115898303 0.2297429217 0.01977808
## I U Ve Strain TCL
## C 0.46477037 -0.04636266 -0.088951656 -0.2277116838 0.144115282
## Si 0.11991791 -0.09558830 0.190764151 -0.1025840072 -0.036470102
## Mn -0.36261756 -0.17551355 0.159474494 0.0488052736 -0.130996289
## P 0.32546704 -0.09585409 0.201618986 -0.0224399461 0.015617293
## S 0.35347395 -0.05185645 0.189862565 -0.1815947086 -0.119621747
## Cr 0.20792318 0.42670047 -0.409242123 -0.1139047217 0.038104765
## Ni -0.09658742 -0.01035120 -0.142036314 -0.2392610946 0.412909297
## Mo 0.05869663 -0.18796257 0.147218868 0.0300449419 -0.013867362
## N -0.09046160 0.05176411 -0.092270649 0.0903687260 -0.138613869
## Nb -0.26644977 -0.02213752 0.024294094 0.1265840319 -0.121351014
## Co -0.15674378 -0.18049103 0.305376999 0.1019421843 -0.173296857
## Cu -0.02997228 -0.08173343 0.113403144 -0.0649799572 0.089590436
## Al -0.05461621 -0.11431258 -0.039266597 -0.1795698951 0.202270939
## Ti -0.20797505 -0.33850179 0.064263167 -0.1154640284 0.296506692
## V -0.17293942 -0.24203396 0.309047426 -0.0809768292 0.115898303
## B -0.19624364 -0.28119999 0.202086586 -0.0007026004 0.229742922
## Th 0.89051845 0.57241764 -0.567699564 -0.1359585695 0.019778081
## I 1.00000000 0.40114754 -0.250248230 -0.2020144197 0.036293631
## U 0.40114754 1.00000000 -0.502137485 0.0242652899 -0.033752736
## Ve -0.25024823 -0.50213749 1.000000000 0.0274623077 -0.004297735
## Strain -0.20201442 0.02426529 0.027462308 1.0000000000 0.270733180
## TCL 0.03629363 -0.03375274 -0.004297735 0.2707331797 1.000000000
cora
## Si Fe Cu Mn Mg
## Si 1.00000000 0.41946565 0.30371090 0.69985686 -0.01679518
## Fe 0.41946565 1.00000000 0.01367361 0.16147267 -0.28617836
## Cu 0.30371090 0.01367361 1.00000000 0.41177930 -0.38803728
## Mn 0.69985686 0.16147267 0.41177930 1.00000000 0.14739982
## Mg -0.01679518 -0.28617836 -0.38803728 0.14739982 1.00000000
## Cr 0.28442177 0.11760990 -0.30875105 0.28998191 0.33209597
## Zn 0.16886697 0.18712629 0.32661512 0.23479904 -0.21757192
## Ti 0.14720438 0.04773796 0.01882995 0.20480612 0.53313617
## Zr -0.05186117 -0.18685554 0.48864377 0.11018048 -0.15693241
## V NA NA NA NA NA
## B -0.16233589 -0.31560459 -0.13845711 -0.15064796 0.71706925
## Li -0.17604585 -0.20347178 0.10138874 -0.08560685 -0.11009762
## Al -0.30722007 0.21015028 -0.43847668 -0.56725161 -0.64583767
## U 0.31146101 0.14376163 0.05113820 0.48994437 0.06454867
## I 0.13730297 0.16278287 -0.05140019 -0.20438381 -0.25639121
## Q (kJ/cm) NA NA NA NA NA
## Speed (mm/min) 0.02486073 0.07786063 0.13653407 0.29828949 0.00980461
## Strain 0.03654705 0.03700105 0.07475819 0.01044924 -0.03762740
## MCL -0.02481564 -0.43339428 0.04703515 0.07191182 0.44590688
## Cr Zn Ti Zr V B
## Si 0.28442177 0.16886697 0.14720438 -0.05186117 NA -0.16233589
## Fe 0.11760990 0.18712629 0.04773796 -0.18685554 NA -0.31560459
## Cu -0.30875105 0.32661512 0.01882995 0.48864377 NA -0.13845711
## Mn 0.28998191 0.23479904 0.20480612 0.11018048 NA -0.15064796
## Mg 0.33209597 -0.21757192 0.53313617 -0.15693241 NA 0.71706925
## Cr 1.00000000 0.01412643 0.05551047 -0.09409050 NA -0.11616247
## Zn 0.01412643 1.00000000 0.19682255 0.28933882 NA -0.13367152
## Ti 0.05551047 0.19682255 1.00000000 0.18956819 NA 0.73226155
## Zr -0.09409050 0.28933882 0.18956819 1.00000000 NA -0.03919309
## V NA NA NA NA 1 NA
## B -0.11616247 -0.13367152 0.73226155 -0.03919309 NA 1.00000000
## Li -0.06601020 -0.00622270 -0.03687814 -0.02227177 NA -0.02749633
## Al -0.14139048 -0.08588036 -0.54585787 -0.22784879 NA -0.53310298
## U 0.07318558 -0.29584598 0.07620284 -0.06182329 NA -0.07632594
## I 0.16831816 -0.32416979 -0.19510682 0.13477017 NA -0.27719980
## Q (kJ/cm) NA NA NA NA NA NA
## Speed (mm/min) -0.01094396 0.50716711 0.04443688 -0.05772749 NA -0.07126934
## Strain -0.07356959 0.06596877 0.08489359 0.07694974 NA 0.03121366
## MCL 0.05095885 -0.16555223 0.29152053 0.01311892 NA 0.49887358
## Li Al U I Q (kJ/cm)
## Si -0.176045854 -0.30722007 3.114610e-01 0.13730297 NA
## Fe -0.203471785 0.21015028 1.437616e-01 0.16278287 NA
## Cu 0.101388738 -0.43847668 5.113820e-02 -0.05140019 NA
## Mn -0.085606846 -0.56725161 4.899444e-01 -0.20438381 NA
## Mg -0.110097619 -0.64583767 6.454867e-02 -0.25639121 NA
## Cr -0.066010205 -0.14139048 7.318558e-02 0.16831816 NA
## Zn -0.006222700 -0.08588036 -2.958460e-01 -0.32416979 NA
## Ti -0.036878139 -0.54585787 7.620284e-02 -0.19510682 NA
## Zr -0.022271770 -0.22784879 -6.182329e-02 0.13477017 NA
## V NA NA NA NA NA
## B -0.027496334 -0.53310298 -7.632594e-02 -0.27719980 NA
## Li 1.000000000 -0.05764260 -1.434639e-01 -0.44560128 NA
## Al -0.057642603 1.00000000 -1.436014e-01 0.33418752 NA
## U -0.143463859 -0.14360139 1.000000e+00 -0.11904365 NA
## I -0.445601278 0.33418752 -1.190437e-01 1.00000000 NA
## Q (kJ/cm) NA NA NA NA 1
## Speed (mm/min) 0.500221599 -0.20118505 -3.739558e-03 -0.86067536 NA
## Strain -0.007889434 -0.02267967 -9.099931e-05 -0.02044928 NA
## MCL -0.082222575 -0.43379658 -8.733686e-02 0.01904222 NA
## Speed (mm/min) Strain MCL
## Si 0.024860725 3.654705e-02 -0.02481564
## Fe 0.077860633 3.700105e-02 -0.43339428
## Cu 0.136534070 7.475819e-02 0.04703515
## Mn 0.298289490 1.044924e-02 0.07191182
## Mg 0.009804610 -3.762740e-02 0.44590688
## Cr -0.010943957 -7.356959e-02 0.05095885
## Zn 0.507167108 6.596877e-02 -0.16555223
## Ti 0.044436878 8.489359e-02 0.29152053
## Zr -0.057727490 7.694974e-02 0.01311892
## V NA NA NA
## B -0.071269339 3.121366e-02 0.49887358
## Li 0.500221599 -7.889434e-03 -0.08222257
## Al -0.201185049 -2.267967e-02 -0.43379658
## U -0.003739558 -9.099931e-05 -0.08733686
## I -0.860675362 -2.044928e-02 0.01904222
## Q (kJ/cm) NA NA NA
## Speed (mm/min) 1.000000000 2.002035e-02 -0.22277685
## Strain 0.020020351 1.000000e+00 0.21957716
## MCL -0.222776847 2.195772e-01 1.00000000
#Plotting variables against each other to get a sense of the datas shape
plot(al$Mg, al$MCL, xlab = "% Mg", ylab = "MCL")

#Plotting variables against each other to get a sense of the datas shape
plot(stl$Ni, stl$Strain , xlab = "% Ni", ylab = "Strain")

#Plotting variables against each other to get a sense of the datas shape
plot(stl$Ni, stl$TCL, xlab = "%Ni", ylab = "TCL")

xlab("%Ni")
## $x
## [1] "%Ni"
##
## attr(,"class")
## [1] "labels"
#Plotting variables against each other to get a sense of the datas shape
plot(al$Fe, al$MCL)

#Multiple Linear Regression Observing Strain for Steel Dataset
LMstl = lm(Strain ~ C + Si + Mn + P + S + Cr + Ni + Mo + N + Nb + Co + Cu + Al + Ti + V + B, data = stl)
summary(LMstl)
##
## Call:
## lm(formula = Strain ~ C + Si + Mn + P + S + Cr + Ni + Mo + N +
## Nb + Co + Cu + Al + Ti + V + B, data = stl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5433 -1.0243 -0.2570 0.9557 3.2059
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.87275 0.60176 6.436 3.04e-10 ***
## C -1.19395 0.91073 -1.311 0.1905
## Si -0.32091 0.15590 -2.058 0.0401 *
## Mn 0.03211 0.08126 0.395 0.6929
## P -7.40409 7.51020 -0.986 0.3247
## S -31.55089 10.96724 -2.877 0.0042 **
## Cr -0.04012 0.02877 -1.394 0.1638
## Ni -0.02603 0.01140 -2.283 0.0229 *
## Mo -0.05497 0.06677 -0.823 0.4108
## N 1.63127 1.43070 1.140 0.2548
## Nb 0.29512 0.39216 0.753 0.4521
## Co 1.92190 1.47413 1.304 0.1930
## Cu -0.17698 0.12385 -1.429 0.1537
## Al -1.15149 0.49299 -2.336 0.0199 *
## Ti -0.20741 0.42841 -0.484 0.6285
## V -1.23342 0.50128 -2.461 0.0142 *
## B 80.31391 49.08260 1.636 0.1024
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.305 on 470 degrees of freedom
## Multiple R-squared: 0.1675, Adjusted R-squared: 0.1391
## F-statistic: 5.91 on 16 and 470 DF, p-value: 7.467e-12
preLMmstl=predict(LMstl)
plot(stl$Strain, predict(LMstl), xlab = "Strain",
ylab = "Predicted Strain", cex.axis = 1.5,
cex.lab = 1.5, cex = 1.5, pch = 19)

mse_stlMLR_strn <- (sum((stl$Strain - preLMmstl)^2))/487
#MLR Analysis for Strain using the Aluminum Dataset
LMal = lm(Strain ~ Si + Mn + Fe + Mg + Cr + Mn + Zn + Cu + Al + Ti + V + Zr + B + Li, data = al)
summary(LMal)
##
## Call:
## lm(formula = Strain ~ Si + Mn + Fe + Mg + Cr + Mn + Zn + Cu +
## Al + Ti + V + Zr + B + Li, data = al)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0427 -0.5723 -0.3104 0.4602 1.5648
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.517734 22.830605 -0.023 0.982
## Si 1.093791 2.072920 0.528 0.598
## Mn -0.153049 0.511311 -0.299 0.765
## Fe 0.066153 0.767541 0.086 0.931
## Mg 0.004737 0.233416 0.020 0.984
## Cr -0.715096 1.072397 -0.667 0.506
## Zn 0.791344 3.725710 0.212 0.832
## Cu 0.018793 0.246490 0.076 0.939
## Al 0.013137 0.228959 0.057 0.954
## Ti 4.164781 6.154100 0.677 0.499
## V NA NA NA NA
## Zr 1.329972 3.366489 0.395 0.693
## B -32.679515 215.865829 -0.151 0.880
## Li NA NA NA NA
##
## Residual standard error: 0.7917 on 183 degrees of freedom
## Multiple R-squared: 0.02249, Adjusted R-squared: -0.03627
## F-statistic: 0.3827 on 11 and 183 DF, p-value: 0.9616
ggplot(LMal)

preLMal=predict(LMal)
plot(al$Strain, predict(LMal), xlab = "Strain",
ylab = "Predicted Strain", cex.axis = 1.5,
cex.lab = 1.5, cex = 1.5, pch = 19)

mse_alMLR_strn <- (sum((al$Strain - preLMal)^2))/195
#Using MLR to predict Total Crack Length in the Steel Dataset
LMstl_C = lm(TCL ~ C + Si + Mn + P + S + Cr + Ni + Mo + N + Nb + Co + Cu + Al + Ti + V + B, data = stl)
summary(LMstl_C)
##
## Call:
## lm(formula = TCL ~ C + Si + Mn + P + S + Cr + Ni + Mo + N + Nb +
## Co + Cu + Al + Ti + V + B, data = stl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.9291 -2.3798 -0.7128 1.6535 14.1621
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.33857 1.66958 1.401 0.161965
## C -1.14612 2.52681 -0.454 0.650338
## Si 1.17301 0.43255 2.712 0.006936 **
## Mn -0.84531 0.22544 -3.750 0.000199 ***
## P 74.07559 20.83690 3.555 0.000416 ***
## S -75.77125 30.42840 -2.490 0.013113 *
## Cr -0.13295 0.07982 -1.666 0.096449 .
## Ni 0.23756 0.03163 7.512 2.97e-13 ***
## Mo -0.28271 0.18525 -1.526 0.127662
## N 9.23596 3.96945 2.327 0.020402 *
## Nb -1.00382 1.08804 -0.923 0.356692
## Co -11.31785 4.08994 -2.767 0.005876 **
## Cu 0.57644 0.34362 1.678 0.094102 .
## Al -1.09992 1.36779 -0.804 0.421711
## Ti 0.51171 1.18862 0.431 0.667023
## V 1.27378 1.39080 0.916 0.360210
## B 350.08885 136.17870 2.571 0.010453 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.621 on 470 degrees of freedom
## Multiple R-squared: 0.2772, Adjusted R-squared: 0.2526
## F-statistic: 11.26 on 16 and 470 DF, p-value: < 2.2e-16
preLMstl_C=predict(LMstl_C)
plot(stl$TCL, predict(LMstl_C), xlab = "TCL",
ylab = "Predicted TCL", cex.axis = 1.5,
cex.lab = 1.5, cex = 1.5, pch = 19)

mse_stlMLR_TCL <- (sum((stl$TCL - preLMstl_C)^2))/487
#Using MLR to predict Maximum crack Length in Aluminum Dataset
LMal_C = lm(MCL ~ Si + Mn + Fe + Mg + Cr + Mn + Zn + Cu + Al + Ti + V + Zr + B + Li, data = al)
summary(LMal_C)
##
## Call:
## lm(formula = MCL ~ Si + Mn + Fe + Mg + Cr + Mn + Zn + Cu + Al +
## Ti + V + Zr + B + Li, data = al)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5886 -1.1537 -0.2303 0.7453 8.3386
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -130.3028 51.3230 -2.539 0.01195 *
## Si 2.8753 4.6599 0.617 0.53799
## Mn 1.4725 1.1494 1.281 0.20178
## Fe -7.2942 1.7254 -4.227 3.73e-05 ***
## Mg 1.3854 0.5247 2.640 0.00900 **
## Cr 6.4416 2.4107 2.672 0.00822 **
## Zn -12.4408 8.3754 -1.485 0.13916
## Cu 1.6765 0.5541 3.026 0.00284 **
## Al 1.3449 0.5147 2.613 0.00972 **
## Ti 7.1976 13.8344 0.520 0.60351
## V NA NA NA NA
## Zr -11.9012 7.5678 -1.573 0.11754
## B 934.4192 485.2644 1.926 0.05571 .
## Li NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.78 on 183 degrees of freedom
## Multiple R-squared: 0.4277, Adjusted R-squared: 0.3933
## F-statistic: 12.43 on 11 and 183 DF, p-value: < 2.2e-16
preLMal_C=predict(LMal_C)
plot(al$MCL, predict(LMal_C), xlab = "MCL",
ylab = "Predicted MCL", cex.axis = 1.5,
cex.lab = 1.5, cex = 1.5, pch = 19)

mse_alMLR_MCL <- (sum((al$MCL - preLMal_C)^2))/195
sqrt(mse_alMLR_MCL)
## [1] 1.724194
#Feature Selection and PCA to Predict Strain Values
#Steel Strain Model
stEl <- as.matrix(stEl)
stSTN <- as.matrix(stSTN)
COMstl <- glmnet(stEl ,stSTN)
plot(COMstl)

COMfit_stSTN = cv.glmnet(stEl, stSTN, alpha = 1)
plot(COMfit_stSTN)

COMfit_stSTN$lambda.min
## [1] 0.047661
COMfit_stSTN$lambda.1se
## [1] 0.1753152
library(plotmo)
## Loading required package: Formula
## Loading required package: plotrix
## Loading required package: TeachingDemos
plotres(COMfit_stSTN)

pred_stl <- predict(COMstl, stEl, s = .02)
stl_t <- pred_stl[,c("s1")]
stl_s <- as.numeric(stSTN)
plot(stl_s,stl_t, xlab="Measured", ylab = "Predicted")

#Steel TCL Model
stEl <- as.matrix(stEl)
stTCL <- as.matrix(stTCL)
COMstl2 <- glmnet(stEl ,stTCL)
plot(COMstl2, label = TRUE)

COMfit_stTCL = cv.glmnet(stEl, stTCL, alpha = 0)
plot(COMfit_stTCL)

COMfit_stTCL$lambda.min
## [1] 0.2081116
COMfit_stTCL$lambda.1se
## [1] 2.565697
plotres(COMfit_stTCL)

pred_stl2 <- predict(COMstl2, stEl)
stl_t2 <- pred_stl2[,c("s1")]
stl_s2 <- as.numeric(stTCL)
plot(stl_s2,stl_t2, xlab="Measured", ylab = "Predicted")

alEl <- as.matrix(alEl)
alMCL <- as.matrix(alMCL)
COMal <- glmnet(alEl ,alMCL)
plot(COMal, label= TRUE)

COMfit_al = cv.glmnet(alEl, alMCL, alpha = 0)
plot(COMfit_al)

COMfit_al$lambda.min
## [1] 0.1247869
lam.best <- COMfit_al$lambda.1se
coef(COMfit_al, s=lam.best)
## 14 x 1 sparse Matrix of class "dgCMatrix"
## s1
## (Intercept) 11.89498256
## Si -0.05638343
## Fe -3.06769659
## Cu 0.03939175
## Mn 0.09983304
## Mg 0.08398258
## Cr 0.47693269
## Zn -6.57936523
## Ti 4.79314053
## Zr -0.99161274
## V .
## B 325.69793766
## Li -0.34088924
## Al -0.08699633
plotres(COMfit_al)

pred_al <- predict(COMal, alEl, s = .2)
al_t2 <- pred_al[,c("s1")]
al_s2 <- as.numeric(alMCL)
plot(al_s2,al_t2, xlab="Measured", ylab = "Predicted")

#defining dataframes that can be used for PCA
alrf_MCL <- al[c(1:13,19)]
alrf_STN <- al[c(1:13,18)]
stlrf_TCL <- stl[c(1:16,22)]
stlrf_STN <- stl[c(1:16,21)]
#Using PCA to idenifty sources of varience and hopefully insights into model simplifications
Si <-as.numeric(alrf_MCL$Si)
Fe <-as.numeric(alrf_MCL$Fe)
Cu <-as.numeric(alrf_MCL$Cu)
Mn <-as.numeric(alrf_MCL$Mn)
Mg <-as.numeric(alrf_MCL$Mg)
Cr <-as.numeric(alrf_MCL$Cr)
Zn <-as.numeric(alrf_MCL$Zn)
Ti <-as.numeric(alrf_MCL$Ti)
Zr <-as.numeric(alrf_MCL$Zr)
V <-as.numeric(alrf_MCL$V)
B <-as.numeric(alrf_MCL$B)
Li <-as.numeric(alrf_MCL$Li)
Al <-as.numeric(alrf_MCL$Al)
MCL <-as.numeric(alrf_MCL$MCL)
alSTN <- as.numeric(alrf_STN$Strain)
alQ <-(al[c(16)])
#PCA for MCL in Al
pralT <- cbind(Si,Fe,Cu,Mn,Mg,Cr,Zn,Ti,Zr,V,B,Li,Al,MCL,alSTN)
pralPCA = prcomp(pralT, scale=F)
summary(pralPCA)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.4235 2.3617 1.8470 0.7452 0.30065 0.25413 0.09614
## Proportion of Variance 0.5468 0.2602 0.1592 0.0259 0.00422 0.00301 0.00043
## Cumulative Proportion 0.5468 0.8070 0.9662 0.9921 0.99628 0.99929 0.99972
## PC8 PC9 PC10 PC11 PC12 PC13
## Standard deviation 0.06368 0.02838 0.02605 0.01585 0.01253 0.0002715
## Proportion of Variance 0.00019 0.00004 0.00003 0.00001 0.00001 0.0000000
## Cumulative Proportion 0.99991 0.99995 0.99998 0.99999 1.00000 1.0000000
## PC14 PC15
## Standard deviation 1.835e-15 5.962e-19
## Proportion of Variance 0.000e+00 0.000e+00
## Cumulative Proportion 1.000e+00 1.000e+00
barplot(pralPCA$rotation[,1], main="PC 1 Loadings Plot", las=2)

biplot(pralPCA)
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length =
## arrow.len): zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length =
## arrow.len): zero-length arrow is of indeterminate angle and so skipped

require(showtext)
library(FactoMineR)
require(FactoMineR)
require(factoextra)
fviz_eig(pralPCA)

library(factoextra)
fviz_eig(pralPCA)

fviz_pca_ind(pralPCA,
col.ind = "cos2", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)

fviz_pca_var(pralPCA,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)

fviz_pca_biplot(pralPCA, repel = TRUE, labels = TRUE,
col.var = "#2E9FDF", # Variables color
col.ind = "#696969" # Individuals color
)

#MLR using only Al, Mg, Cu while looking at MCL
LMal2 = lm(MCL ~ Mg + Cu + Al, data = al)
summary(LMal2)
##
## Call:
## lm(formula = MCL ~ Mg + Cu + Al, data = al)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.7049 -1.1715 -0.3048 0.7032 9.3522
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -121.6902 35.2737 -3.450 0.000690 ***
## Mg 1.8080 0.3707 4.877 2.26e-06 ***
## Cu 1.6661 0.3944 4.224 3.71e-05 ***
## Al 1.2390 0.3542 3.498 0.000583 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.926 on 191 degrees of freedom
## Multiple R-squared: 0.3006, Adjusted R-squared: 0.2897
## F-statistic: 27.37 on 3 and 191 DF, p-value: 9.054e-15
pralstn <- cbind(Si,Fe,Cu,Mn,Mg,Cr,Zn,Ti,Zr,V,B,Li,Al,alSTN)
pcaalstn = prcomp(pralstn, scale=F)
fviz_eig(pcaalstn)

barplot(pcaalstn$rotation[,1], main="PC 1 Loadings Plot", las=2)

fviz_eig(pcaalstn)

fviz_pca_ind(pcaalstn,
col.ind = "cos2", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)

fviz_pca_var(pcaalstn,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)

fviz_pca_biplot(pcaalstn, repel = TRUE, labels = TRUE,
col.var = "#2E9FDF", # Variables color
col.ind = "#696969" # Individuals color
)

sSi <-as.numeric(stlrf_STN$Si)
sC <-as.numeric(stlrf_STN$C)
sCu <-as.numeric(stlrf_STN$Cu)
sMn <-as.numeric(stlrf_STN$Mn)
sMo <-as.numeric(stlrf_STN$Mo)
sCr <-as.numeric(stlrf_STN$Cr)
sN <-as.numeric(stlrf_STN$N)
sTi <-as.numeric(stlrf_STN$Ti)
sP <-as.numeric(stlrf_STN$P)
sV <-as.numeric(stlrf_STN$V)
sB <-as.numeric(stlrf_STN$B)
sNi <-as.numeric(stlrf_STN$Ni)
sAl <-as.numeric(stlrf_STN$Al)
sNb <-as.numeric(stlrf_STN$Nb)
sCo <-as.numeric(stlrf_STN$Co)
sS <-as.numeric(stlrf_STN$S)
STN <-as.numeric(stlrf_STN$Strain)
TCL <- as.numeric(stlrf_TCL$TCL)
prasstl <- cbind(sSi,sC,sCu,sMn,sMo,sCr,sN,sTi,sP,sV,sB,sAl,sNb,sCo,sS,STN)
pcasstl = prcomp(prasstl, scale=F)
summary(pcasstl)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.3093 1.4020 1.3052 1.03458 0.53020 0.50246 0.21049
## Proportion of Variance 0.6703 0.1203 0.1043 0.06551 0.01721 0.01545 0.00271
## Cumulative Proportion 0.6703 0.7906 0.8948 0.96035 0.97756 0.99301 0.99572
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.1669 0.14092 0.11861 0.06646 0.04400 0.04068 0.008156
## Proportion of Variance 0.0017 0.00122 0.00086 0.00027 0.00012 0.00010 0.000000
## Cumulative Proportion 0.9974 0.99864 0.99950 0.99977 0.99989 0.99999 1.000000
## PC15 PC16
## Standard deviation 0.005316 0.001222
## Proportion of Variance 0.000000 0.000000
## Cumulative Proportion 1.000000 1.000000
barplot(pcasstl$rotation[,1], main="PC 1 Loadings Plot", las=2)

fviz_eig(pcasstl)

library(factoextra)
fviz_eig(pcasstl)

fviz_pca_ind(pcasstl,
col.ind = "cos2", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)

fviz_pca_var(pcasstl,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)

fviz_pca_biplot(pcasstl, repel = TRUE, label = TRUE,
col.var = "#2E9FDF", # Variables color
col.ind = "#696969" # Individuals color
)

prasstl2 <- cbind(sSi,sC,sCu,sMn,sMo,sCr,sN,sTi,sP,sV,sB,sAl,sNb,sCo,sS,TCL)
pcatstl = prcomp(prasstl2, scale=F)
summary(pcatstl)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 4.1983 3.2978 1.29642 1.03397 0.53302 0.50704 0.20541
## Proportion of Variance 0.5525 0.3409 0.05268 0.03351 0.00891 0.00806 0.00132
## Cumulative Proportion 0.5525 0.8933 0.94601 0.97952 0.98842 0.99648 0.99780
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.16650 0.14091 0.12003 0.06655 0.04411 0.04050 0.008163
## Proportion of Variance 0.00087 0.00062 0.00045 0.00014 0.00006 0.00005 0.000000
## Cumulative Proportion 0.99867 0.99929 0.99975 0.99988 0.99995 1.00000 1.000000
## PC15 PC16
## Standard deviation 0.005308 0.001207
## Proportion of Variance 0.000000 0.000000
## Cumulative Proportion 1.000000 1.000000
barplot(pcatstl$rotation[,2], main="PC 1 Loadings Plot", las=2)

fviz_eig(pcatstl)

fviz_pca_ind(pcatstl,
col.ind = "cos2", # Color by the quality of representation
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)

fviz_pca_var(pcatstl,
col.var = "contrib", # Color by contributions to the PC
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE # Avoid text overlapping
)

fviz_pca_biplot(pcatstl, repel = TRUE, label = TRUE,
col.var = "#2E9FDF", # Variables color
col.ind = "#696969" # Individuals color
)

sqrt(mse_alMLR_MCL)
## [1] 1.724194
sqrt(mse_alMLR_strn)
## [1] 0.7669933
sqrt(mse_stlMLR_TCL)
## [1] 3.557559
sqrt(mse_stlMLR_strn)
## [1] 1.282243